library(data.table)
library(ggplot2)
library(plotly)
library(dplyr)
library(imbalance)
library(class)

My Goal is to detect credit cart Fraud Problem is Class imbalance Over sampling -Majority Weighted Minority Oversampling TEchnique Under sampling SMOTE Testing

df <- fread('creditcard.csv',header=T)
df$Class <- factor(df$Class)
head(df)
summary(df$Class)
     0      1 
284315    492 
is.na(df) %>% any
[1] FALSE
df$Time<- df$Time  %>% scale() %>% as.data.frame()
df$Amount<- df$Amount  %>% scale() %>% as.data.frame()
str(df)
Classes ‘data.table’ and 'data.frame':  284807 obs. of  31 variables:
 $ Time  : num  -2 -2 -2 -2 -2 ...
 $ V1    : num  -1.36 1.192 -1.358 -0.966 -1.158 ...
 $ V2    : num  -0.0728 0.2662 -1.3402 -0.1852 0.8777 ...
 $ V3    : num  2.536 0.166 1.773 1.793 1.549 ...
 $ V4    : num  1.378 0.448 0.38 -0.863 0.403 ...
 $ V5    : num  -0.3383 0.06 -0.5032 -0.0103 -0.4072 ...
 $ V6    : num  0.4624 -0.0824 1.8005 1.2472 0.0959 ...
 $ V7    : num  0.2396 -0.0788 0.7915 0.2376 0.5929 ...
 $ V8    : num  0.0987 0.0851 0.2477 0.3774 -0.2705 ...
 $ V9    : num  0.364 -0.255 -1.515 -1.387 0.818 ...
 $ V10   : num  0.0908 -0.167 0.2076 -0.055 0.7531 ...
 $ V11   : num  -0.552 1.613 0.625 -0.226 -0.823 ...
 $ V12   : num  -0.6178 1.0652 0.0661 0.1782 0.5382 ...
 $ V13   : num  -0.991 0.489 0.717 0.508 1.346 ...
 $ V14   : num  -0.311 -0.144 -0.166 -0.288 -1.12 ...
 $ V15   : num  1.468 0.636 2.346 -0.631 0.175 ...
 $ V16   : num  -0.47 0.464 -2.89 -1.06 -0.451 ...
 $ V17   : num  0.208 -0.115 1.11 -0.684 -0.237 ...
 $ V18   : num  0.0258 -0.1834 -0.1214 1.9658 -0.0382 ...
 $ V19   : num  0.404 -0.146 -2.262 -1.233 0.803 ...
 $ V20   : num  0.2514 -0.0691 0.525 -0.208 0.4085 ...
 $ V21   : num  -0.01831 -0.22578 0.248 -0.1083 -0.00943 ...
 $ V22   : num  0.27784 -0.63867 0.77168 0.00527 0.79828 ...
 $ V23   : num  -0.11 0.101 0.909 -0.19 -0.137 ...
 $ V24   : num  0.0669 -0.3398 -0.6893 -1.1756 0.1413 ...
 $ V25   : num  0.129 0.167 -0.328 0.647 -0.206 ...
 $ V26   : num  -0.189 0.126 -0.139 -0.222 0.502 ...
 $ V27   : num  0.13356 -0.00898 -0.05535 0.06272 0.21942 ...
 $ V28   : num  -0.0211 0.0147 -0.0598 0.0615 0.2152 ...
 $ Amount: num  0.245 -0.3425 1.1607 0.1405 -0.0734 ...
 $ Class : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, ".internal.selfref")=<externalptr> 
head(df)

rbind: combines data frame by rows.

No Fraud    Fraud 
  199020   199020 
[1] 12
library(unbalanced)
data(ubIonosphere)
n<-ncol(ubIonosphere)
library(unbalanced)
df.ubUnder<-ubUnder(X=df[, -31], Y=df$Class, perc = 50,  method = "percPos")
newData<-cbind(df.ubUnder$X, Class = df.ubUnder$Y)
newData
summary(newData$Class)
  0   1 
492 492 
set.seed(1234)
train.id <- caTools::sample.split(newData$Class, SplitRatio = 0.70)
newData.train <- subset(newData, train.id)
newData.validate <- subset(newData, !train.id)

newData.train.class <- newData$Class[train.id]
newData.validate.class <- newData$Class[!train.id]

find.optimum.k <- function(k) {
  predictied <- knn(newData.train[, 1:30], newData.validate[, 1:30], newData.train.class, k = k)
  confusion.table <- table(predictied, newData.validate.class)

  confusion.matrix <- data.frame(pred_Y = c(confusion.table[2, 2], confusion.table[2, 1]),
                                 pred_N = c(confusion.table[1, 2], confusion.table[1, 1]),
                                 row.names = c("Fraud", "No Fraud"))
  Accuracy <- (confusion.matrix[1, 1] + confusion.matrix[2 ,2])/ sum(confusion.matrix)
  # Balanced.Accuracy <- ((confusion.matrix[1, 1])/confusion.matrix[1, 1] + confusion.matrix[2, 1]) + ((confusion.matrix[2, 2])/confusion.matrix[1, 2] + confusion.matrix[2, 2]))/2
  # F1Score <- 2*confusion.matrix[1, 1]/((2*confusion.matrix[1, 1]) + (confusion.matrix[2, 1]) + (confusion.matrix[1, 2]))
  Sensitivity <- confusion.matrix[1, 1]/sum(confusion.matrix[, 1])
  Specificity <- confusion.matrix[2, 2]/sum(confusion.matrix[, 2])
  df.perf <- c(Accuracy=Accuracy, Sensitivity=Sensitivity, Specificity=Specificity)
  # Precision <- confusion.matrix[1, 1]/(confusion.matrix[1, 1] + confusion.matrix[2, 1])
  # NegativePedictiveValue <- confusion.matrix[1, 2]/(confusion.matrix[1, 2] + confusion.matrix[2, 2])
  # FallOut <- confusion.matrix[2, 1]/(confusion.matrix[1, 2] + confusion.matrix[2, 2])
  # print(k)
  return(df.perf)

}
vec.k <- 1: 20
results <- sapply(vec.k, find.optimum.k)
results <- apply(results,1,unlist)
results <- as.data.frame(results)
results$k <- vec.k

pl <- ggplot(data=results)+geom_line(aes(x=k,y=Accuracy),size=1,color="red")+
  geom_line(aes(x=k,y=Sensitivity),size=1,color="blue")+
  geom_line(aes(x=k,y=Specificity),size=1,color="green")+
  ylab('performance')+
  theme_bw()
library(plotly)
ggplotly(pl)

NA
NA
          newData.validate.class
predictied   0   1
         0 140  15
         1   8 133
logistic.model = glm(Class ~., family = binomial, data = newData.train)
glm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logistic.model)

Call:
glm(formula = Class ~ ., family = binomial, data = newData.train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.4321  -0.1517   0.0000   0.0000   3.1347  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)
(Intercept)   -3.20140   10.64088  -0.301    0.764
Time           0.07075    0.43061   0.164    0.869
V1            -2.50412   23.71586  -0.106    0.916
V2            33.73531  152.15264   0.222    0.825
V3           -26.00857   60.49761  -0.430    0.667
V4            19.61983   48.62975   0.403    0.687
V5            -9.78592   12.04276  -0.813    0.416
V6           -16.83303   69.15649  -0.243    0.808
V7           -63.92582  238.74546  -0.268    0.789
V8            12.63729   41.30687   0.306    0.760
V9           -24.38046   73.20700  -0.333    0.739
V10          -56.13230  168.24894  -0.334    0.739
V11           42.71157  142.33152   0.300    0.764
V12          -76.32411  255.61469  -0.299    0.765
V13           -0.60973    6.81015  -0.090    0.929
V14          -82.27881  278.83220  -0.295    0.768
V15           -2.72018    9.96380  -0.273    0.785
V16          -72.72521  246.07846  -0.296    0.768
V17         -128.32083  432.01053  -0.297    0.766
V18          -48.80905  165.07732  -0.296    0.767
V19           19.23932   68.36637   0.281    0.778
V20           -8.27774   46.82875  -0.177    0.860
V21            7.45454   15.14105   0.492    0.622
V22            6.12643   30.52881   0.201    0.841
V23           16.01833   91.45287   0.175    0.861
V24           -1.73290    8.72275  -0.199    0.843
V25            8.61549   41.64026   0.207    0.836
V26            0.87876   10.51042   0.084    0.933
V27            6.89893   32.92813   0.210    0.834
V28           20.81647  110.84703   0.188    0.851
Amount        49.53423  264.32338   0.187    0.851

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 953.77  on 687  degrees of freedom
Residual deviance: 131.48  on 657  degrees of freedom
AIC: 193.48

Number of Fisher Scoring iterations: 25
pred <- predict(logistic.model, newdata = newData.validate, type = "response")
perf.table <- table(newData.validate$Class, pred>.1) 
conf.mat <- data.frame(pred_T=c(perf.table[2,2],perf.table[1,2]), pred_F=c(perf.table[2,1],perf.table[1,1]), row.names = c("act_T","act_F"))
ACC <-  (conf.mat[1,1]+conf.mat[2,2])/sum(conf.mat)
Sensitivity <-  conf.mat[1,1] /sum(conf.mat[,1])
Specificity <-  conf.mat[2,2] / (conf.mat[2,1]+conf.mat[2,2])

df.perf <- data.frame(ACC,Sensitivity,Specificity)
df.perf 
library(MASS)
step <- stepAIC(logistic.model, direction="both", trace=T)
Start:  AIC=193.48
Class ~ Time + V1 + V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + 
    V11 + V12 + V13 + V14 + V15 + V16 + V17 + V18 + V19 + V20 + 
    V21 + V22 + V23 + V24 + V25 + V26 + V27 + V28 + Amount
glm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurred
         Df Deviance     AIC
- V1      1    131.5   191.5
- V5      1    131.5   191.5
- V26     1    131.5   191.5
- V13     1    131.5   191.5
- Time    1    131.5   191.5
- V20     1    131.9   191.9
- V27     1    132.2   192.2
- V23     1    132.5   192.5
- V24     1    133.0   193.0
- Amount  1    133.4   193.4
<none>         131.5   193.5
- V28     1    134.7   194.7
- V8      1    136.1   196.1
- V7      1    137.9   197.9
- V15     1    138.1   198.1
- V9      1    138.9   198.9
- V6      1    139.5   199.5
- V19     1    140.8   200.8
- V18     1    141.1   201.1
- V17     1    141.6   201.6
- V16     1    144.6   204.6
- V10     1    144.6   204.6
- V12     1    145.9   205.9
- V11     1    148.1   208.1
- V14     1    172.9   232.9
- V2      1   2451.0  2511.0
- V3      1   2523.1  2583.1
- V4      1   3243.9  3303.9
- V21     1   3316.0  3376.0
- V25     1   4181.1  4241.1
- V22     1  20784.9 20844.9
glm.fit: fitted probabilities numerically 0 or 1 occurred

Step:  AIC=191.48
Class ~ Time + V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + 
    V11 + V12 + V13 + V14 + V15 + V16 + V17 + V18 + V19 + V20 + 
    V21 + V22 + V23 + V24 + V25 + V26 + V27 + V28 + Amount
glm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurred
         Df Deviance    AIC
- Time    1    131.5  189.5
- V27     1    132.3  190.3
- V13     1    132.4  190.4
<none>         131.5  191.5
- V24     1    133.7  191.7
- V28     1    134.9  192.9
+ V1      1    131.5  193.5
- V23     1    136.0  194.0
- V22     1    136.1  194.1
- V8      1    136.5  194.5
- V2      1    136.6  194.6
- V21     1    138.2  196.2
- V5      1    140.2  198.2
- V3      1    141.7  199.7
- V19     1    142.6  200.6
- V6      1    142.7  200.7
- V9      1    142.8  200.8
- V20     1    143.8  201.8
- V7      1    144.2  202.2
- V17     1    144.9  202.9
- V18     1    145.3  203.3
- V16     1    147.2  205.2
- V12     1    147.5  205.5
- V10     1    149.3  207.3
- V11     1    150.8  208.8
- V4      1    174.0  232.0
- V14     1    175.3  233.3
- V26     1   2018.4 2076.4
- V15     1   2523.1 2581.1
- V25     1   2667.2 2725.2
- Amount  1   4541.5 4599.5
glm.fit: fitted probabilities numerically 0 or 1 occurred

Step:  AIC=189.5
Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + V11 + V12 + 
    V13 + V14 + V15 + V16 + V17 + V18 + V19 + V20 + V21 + V22 + 
    V23 + V24 + V25 + V26 + V27 + V28 + Amount
glm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurred
         Df Deviance    AIC
- V27     1    132.4  188.4
- V13     1    132.6  188.6
<none>         131.5  189.5
- V24     1    133.8  189.8
- V28     1    135.0  191.0
+ Time    1    131.5  191.5
+ V1      1    131.5  191.5
- V23     1    136.1  192.1
- V22     1    136.3  192.3
- V8      1    136.6  192.6
- V2      1    136.7  192.7
- V21     1    138.7  194.7
- V5      1    140.3  196.3
- V19     1    142.7  198.7
- V3      1    142.7  198.7
- V6      1    142.8  198.8
- V9      1    143.0  199.0
- V20     1    143.9  199.9
- V7      1    144.3  200.3
- V17     1    145.1  201.1
- V18     1    145.3  201.3
- V16     1    147.3  203.3
- V12     1    147.5  203.5
- V10     1    149.3  205.3
- V11     1    151.0  207.0
- V14     1    175.3  231.3
- V4      1    175.6  231.6
- V25     1   1946.4 2002.4
- V15     1   2667.2 2723.2
- V26     1   3243.9 3299.9
- Amount  1   6559.9 6615.9
glm.fit: fitted probabilities numerically 0 or 1 occurred

Step:  AIC=188.42
Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + V11 + V12 + 
    V13 + V14 + V15 + V16 + V17 + V18 + V19 + V20 + V21 + V22 + 
    V23 + V24 + V25 + V26 + V28 + Amount
glm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurred
         Df Deviance    AIC
- V13     1    132.6  186.6
- V26     1    132.7  186.7
- V24     1    133.8  187.8
<none>         132.4  188.4
+ V27     1    131.5  189.5
- V28     1    136.0  190.0
+ Time    1    132.3  190.3
+ V1      1    132.3  190.3
- V22     1    137.2  191.2
- V21     1    139.0  193.0
- V5      1    140.9  194.9
- V19     1    143.6  197.6
- V9      1    143.6  197.6
- V20     1    143.9  197.9
- V3      1    144.1  198.1
- V7      1    146.3  200.3
- V17     1    146.7  200.7
- V18     1    147.0  201.0
- V6      1    147.6  201.6
- V16     1    149.3  203.3
- V12     1    149.4  203.4
- V10     1    149.4  203.4
- V11     1    153.3  207.3
- V4      1    176.3  230.3
- V14     1    181.7  235.7
- V25     1   2667.2 2721.2
- V8      1   2739.3 2793.3
- V2      1   3243.9 3297.9
- Amount  1   3388.1 3442.1
- V23     1   3604.4 3658.4
- V15     1   3892.7 3946.7
glm.fit: fitted probabilities numerically 0 or 1 occurred

Step:  AIC=186.57
Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + V11 + V12 + 
    V14 + V15 + V16 + V17 + V18 + V19 + V20 + V21 + V22 + V23 + 
    V24 + V25 + V26 + V28 + Amount
glm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurred
         Df Deviance    AIC
- V26     1    132.9  184.9
- V24     1    133.8  185.8
<none>         132.6  186.6
- V28     1    136.1  188.1
+ Time    1    132.4  188.4
+ V13     1    132.4  188.4
+ V27     1    132.6  188.6
- V22     1    137.5  189.5
- V23     1    137.9  189.9
- V8      1    138.5  190.5
- V25     1    138.7  190.7
- V2      1    138.7  190.7
- Amount  1    139.5  191.5
- V21     1    139.9  191.9
- V15     1    140.7  192.7
- V5      1    141.2  193.2
- V9      1    143.6  195.6
- V19     1    143.7  195.7
- V20     1    144.1  196.1
- V3      1    144.2  196.2
- V7      1    146.4  198.4
- V17     1    146.8  198.8
- V18     1    147.0  199.0
- V6      1    148.1  200.1
- V10     1    149.4  201.4
- V12     1    149.5  201.5
- V16     1    149.5  201.5
- V11     1    153.5  205.5
- V4      1    176.5  228.5
- V14     1    183.6  235.6
+ V1      1   4901.9 4957.9
glm.fit: fitted probabilities numerically 0 or 1 occurred

Step:  AIC=184.93
Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + V11 + V12 + 
    V14 + V15 + V16 + V17 + V18 + V19 + V20 + V21 + V22 + V23 + 
    V24 + V25 + V28 + Amount
glm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurred
         Df Deviance     AIC
- V24     1   134.69  184.69
<none>        132.93  184.93
+ V26     1   132.57  186.57
+ V13     1   132.68  186.68
+ Time    1   132.70  186.70
+ V27     1   132.85  186.85
- V28     1   138.13  188.13
- V22     1   140.38  190.38
- V23     1   141.10  191.10
- V25     1   141.64  191.64
- V8      1   141.65  191.65
- V21     1   141.93  191.93
- V2      1   142.46  192.46
- V15     1   142.74  192.74
- Amount  1   143.42  193.42
- V5      1   143.58  193.58
- V9      1   146.58  196.58
- V19     1   146.81  196.81
- V20     1   146.93  196.93
- V3      1   148.16  198.16
- V7      1   148.92  198.92
- V18     1   149.49  199.49
- V17     1   150.65  200.65
- V12     1   152.20  202.20
- V6      1   152.23  202.23
- V10     1   153.39  203.39
- V16     1   154.73  204.73
- V11     1   156.31  206.31
- V4      1   179.82  229.82
- V14     1   184.52  234.52
+ V1      1  2523.06 2577.06
glm.fit: fitted probabilities numerically 0 or 1 occurred

Step:  AIC=184.69
Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + V11 + V12 + 
    V14 + V15 + V16 + V17 + V18 + V19 + V20 + V21 + V22 + V23 + 
    V25 + V28 + Amount
glm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurredglm.fit: fitted probabilities numerically 0 or 1 occurred
         Df Deviance     AIC
<none>        134.69  184.69
+ V24     1   132.93  184.93
+ V26     1   133.85  185.85
- V28     1   138.24  186.24
+ Time    1   134.52  186.52
+ V27     1   134.68  186.68
- V22     1   140.38  188.38
- V23     1   141.14  189.14
- V25     1   141.64  189.64
- V8      1   141.66  189.66
- V21     1   141.95  189.95
- V2      1   142.56  190.56
- V15     1   142.91  190.91
- Amount  1   143.50  191.50
- V5      1   143.58  191.58
- V9      1   146.82  194.82
- V20     1   146.96  194.96
- V19     1   147.08  195.08
- V3      1   148.24  196.24
- V7      1   148.94  196.94
- V18     1   149.66  197.66
- V17     1   150.71  198.71
- V12     1   152.30  200.30
- V6      1   152.43  200.43
- V10     1   153.48  201.48
- V16     1   154.95  202.95
- V11     1   156.39  204.39
- V4      1   179.87  227.87
- V14     1   184.79  232.79
+ V1      1  2378.88 2430.88
+ V13     1  2955.58 3007.58
summary(step)

Call:
glm(formula = Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + 
    V10 + V11 + V12 + V14 + V15 + V16 + V17 + V18 + V19 + V20 + 
    V21 + V22 + V23 + V25 + V28 + Amount, family = binomial, 
    data = newData.train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.8812  -0.1585   0.0000   0.0000   3.0080  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -4.1915     0.7049  -5.946 2.74e-09 ***
V2           11.8918     4.4644   2.664 0.007729 ** 
V3           -8.3449     2.9446  -2.834 0.004598 ** 
V4            7.5880     2.3582   3.218 0.001292 ** 
V5           -2.6438     1.1723  -2.255 0.024121 *  
V6           -6.6407     2.1444  -3.097 0.001956 ** 
V7          -22.4832     7.9731  -2.820 0.004804 ** 
V8            5.7429     2.0498   2.802 0.005084 ** 
V9           -8.9079     3.2077  -2.777 0.005487 ** 
V10         -20.5296     7.1154  -2.885 0.003911 ** 
V11          15.1334     5.0775   2.980 0.002878 ** 
V12         -26.9351     9.1725  -2.936 0.003319 ** 
V14         -29.6791     9.8085  -3.026 0.002479 ** 
V15          -1.0802     0.3967  -2.723 0.006466 ** 
V16         -25.7191     8.7317  -2.945 0.003224 ** 
V17         -44.5236    15.4379  -2.884 0.003926 ** 
V18         -17.0165     5.8785  -2.895 0.003795 ** 
V19           7.1600     2.4632   2.907 0.003652 ** 
V20          -5.8474     1.7757  -3.293 0.000991 ***
V21           4.1979     1.6353   2.567 0.010256 *  
V22           2.2779     1.0011   2.275 0.022877 *  
V23           5.9484     2.4089   2.469 0.013536 *  
V25           3.1093     1.2414   2.505 0.012258 *  
V28           5.6067     3.3646   1.666 0.095632 .  
Amount       20.1507     7.1488   2.819 0.004821 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 953.77  on 687  degrees of freedom
Residual deviance: 134.69  on 663  degrees of freedom
AIC: 184.69

Number of Fisher Scoring iterations: 19
logistic_AIC = glm(formula = Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + 
    V10 + V11 + V12 + V13 + V14 + V15 + V16 + V17 + V18 + V19 + 
    V20 + V21 + V22 + V23 + V24 + V25 + V26 + V27 + V28 + Amount,
    family = binomial, data = newData.validate)
glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logistic_AIC)

Call:
glm(formula = Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + 
    V10 + V11 + V12 + V13 + V14 + V15 + V16 + V17 + V18 + V19 + 
    V20 + V21 + V22 + V23 + V24 + V25 + V26 + V27 + V28 + Amount, 
    family = binomial, data = newData.validate)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
 -8.49    0.00    0.00    0.00    8.49  

Coefficients:
              Estimate Std. Error    z value Pr(>|z|)    
(Intercept) -9.536e+14  5.249e+06 -181675792   <2e-16 ***
V2           4.680e+14  5.555e+06   84258151   <2e-16 ***
V3           5.445e+13  3.643e+06   14948307   <2e-16 ***
V4           4.065e+14  3.126e+06  130035833   <2e-16 ***
V5           4.457e+14  5.334e+06   83560078   <2e-16 ***
V6          -1.713e+14  4.615e+06  -37113171   <2e-16 ***
V7           6.566e+12  5.451e+06    1204454   <2e-16 ***
V8          -1.238e+11  2.507e+06     -49374   <2e-16 ***
V9           5.287e+14  4.457e+06  118629461   <2e-16 ***
V10         -3.503e+14  5.734e+06  -61086597   <2e-16 ***
V11          1.669e+14  4.181e+06   39913379   <2e-16 ***
V12          2.246e+14  3.594e+06   62498325   <2e-16 ***
V13         -2.573e+14  4.433e+06  -58053277   <2e-16 ***
V14         -6.228e+13  3.490e+06  -17844088   <2e-16 ***
V15          2.842e+14  4.614e+06   61594632   <2e-16 ***
V16          3.291e+14  5.238e+06   62826892   <2e-16 ***
V17         -2.363e+14  3.667e+06  -64431360   <2e-16 ***
V18         -5.136e+14  5.747e+06  -89367420   <2e-16 ***
V19          3.512e+14  4.410e+06   79633393   <2e-16 ***
V20         -9.236e+13  7.645e+06  -12080093   <2e-16 ***
V21          3.785e+13  3.270e+06   11573155   <2e-16 ***
V22          2.059e+14  6.407e+06   32137775   <2e-16 ***
V23         -6.824e+13  9.310e+06   -7329837   <2e-16 ***
V24         -1.254e+14  7.809e+06  -16052741   <2e-16 ***
V25          1.477e+14  8.252e+06   17903789   <2e-16 ***
V26          3.014e+14  8.790e+06   34286668   <2e-16 ***
V27         -2.955e+14  7.900e+06  -37411823   <2e-16 ***
V28         -5.741e+13  1.727e+07   -3324490   <2e-16 ***
Amount       4.102e+14  1.328e+07   30882173   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance:  410.34  on 295  degrees of freedom
Residual deviance: 2378.88  on 267  degrees of freedom
AIC: 2436.9

Number of Fisher Scoring iterations: 14
pred <- predict(logistic_AIC, newdata = newData.validate, type = "response")
perf.table <- table(newData.validate$Class,pred>.1) 
conf.mat <- data.frame(pred_T=c(perf.table[2,2],perf.table[1,2]), pred_F=c(perf.table[2,1],perf.table[1,1]), row.names = c("act_T","act_F"))
ACC <-  (conf.mat[1,1]+conf.mat[2,2])/sum(conf.mat)
Sensitivity <-  conf.mat[1,1] /sum(conf.mat[,1])
Specificity <-  conf.mat[2,2] / (conf.mat[2,1]+conf.mat[2,2])
df.perf <- data.frame(ACC,Sensitivity,Specificity)
df.perf 
find.opt.cutoff <- function(cutoff) {
  perf.table <- table(newData.validate$Class, pred > cutoff)
  
  if (ncol(perf.table) == 1) {
    if (colnames(perf.table) == "TRUE") {
      perf.table <- cbind(c(0, 0), perf.table)
    }else{
      perf.table <- cbind(perf.table, c(0, 0))
    }
  }
  
  conf.mat <-
    data.frame(
      pred_T = c(perf.table[2, 2], perf.table[1, 2]),
      pred_F = c(perf.table[2, 1], perf.table[1, 1]),
      row.names = c("act_T", "act_F")
    )
  ACC <-  (conf.mat[1, 1] + conf.mat[2, 2]) / sum(conf.mat)
  Sensitivity <-  conf.mat[1, 1] / sum(conf.mat[, 1])
  Specificity <-  conf.mat[2, 2] / (conf.mat[2, 1] + conf.mat[2, 2])
  df.perf <- c(ACC=ACC, Sensitivity=Sensitivity, Specificity=Specificity)
  df.perf
}


cutoff <- seq(0,1,.1)
results <- sapply(cutoff, find.opt.cutoff) %>% t %>% data.frame()
results$cutoff <- cutoff


pl <- ggplot(data=results)+geom_line(aes(x=cutoff,y=ACC),size=1,color="red")+
  geom_line(aes(x=cutoff,y=Sensitivity),size=1,color="blue")+
  geom_line(aes(x=cutoff,y=Specificity),size=1,color="green")+
  ylab('performance')+
  theme_bw()

ggplotly(pl)
---
title: "R Notebook"
output: html_notebook
---
 

```{r}
library(data.table)
library(ggplot2)
library(plotly)
library(dplyr)
library(imbalance)
library(class)
```
My Goal is to detect credit cart Fraud 
Problem is Class imbalance
Over sampling
-Majority Weighted Minority Oversampling TEchnique
Under sampling
SMOTE
Testing

```{r}
df <- fread('creditcard.csv',header=T)
df$Class <- factor(df$Class)

```
```{r}
head(df)
```
```{r}
summary(df$Class)
```
```{r}
is.na(df) %>% any
```
```{r}
df$Time<- df$Time  %>% scale() %>% as.data.frame()
df$Amount<- df$Amount  %>% scale() %>% as.data.frame()

```
```{r}
str(df)
```
```{r}
head(df)
```
```{r echo=FALSE}
# newMWMOTE <- mwmote(df, numInstances = 283823)
# head(newMWMOTE)
```
rbind: combines data frame by rows.
```{r echo=FALSE}
# df.newMWMOTE <- rbind(df, newMWMOTE)
# plotComparison(df, rbind(df, df.newMWMOTE), attrs = names(df)[2:3])
```
```{r echo=FALSE}
# set.seed(1234)
# train.id <- caTools::sample.split(df.newMWMOTE$Class, SplitRatio = 0.70) 
# df.newMWMOTE.train <- subset(df.newMWMOTE, train.id)
# df.newMWMOTE.validate <- subset(df.newMWMOTE, !train.id)
# 
# df.newMWMOTE.trainClass <- df.newMWMOTE$Class[train.id]
# df.newMWMOTE.validateClass <- df.newMWMOTE$Class[!train.id]
```
```{r echo=FALSE}
# is.na(df.newMWMOTE.validateClass) %>% any
# train.id
# df.newMWMOTE.train.class
```
```{r echo=FALSE}
# summary(df.newMWMOTE.train$Class)
# df.newMWMOTE.train[, -Class]
```


```{r echo=FALSE}
# library(parallel)
  
# numCores <- detectCores()
```
```{r echo=FALSE}
# numCores
```
```{r echo=FALSE}
# library(parallel)
# cl <- makeCluster(2)
# parLapply(cl, 2:4, sqrt)
```
```{r}
library(unbalanced)
data(ubIonosphere)
n<-ncol(ubIonosphere)
```


```{r}
library(unbalanced)
df.ubUnder<-ubUnder(X=df[, -31], Y=df$Class, perc = 50,  method = "percPos")

```
```{r}
newData<-cbind(df.ubUnder$X, Class = df.ubUnder$Y)
```
```{r}
newData
```
```{r}
summary(newData$Class)
```
```{r}
set.seed(1234)
train.id <- caTools::sample.split(newData$Class, SplitRatio = 0.70)
newData.train <- subset(newData, train.id)
newData.validate <- subset(newData, !train.id)

newData.train.class <- newData$Class[train.id]
newData.validate.class <- newData$Class[!train.id]
```
```{r}

find.optimum.k <- function(k) {
  predictied <- knn(newData.train[, 1:30], newData.validate[, 1:30], newData.train.class, k = k)
  confusion.table <- table(predictied, newData.validate.class)

  confusion.matrix <- data.frame(pred_Y = c(confusion.table[2, 2], confusion.table[2, 1]),
                                 pred_N = c(confusion.table[1, 2], confusion.table[1, 1]),
                                 row.names = c("Fraud", "No Fraud"))
  Accuracy <- (confusion.matrix[1, 1] + confusion.matrix[2 ,2])/ sum(confusion.matrix)
  # Balanced.Accuracy <- ((confusion.matrix[1, 1])/confusion.matrix[1, 1] + confusion.matrix[2, 1]) + ((confusion.matrix[2, 2])/confusion.matrix[1, 2] + confusion.matrix[2, 2]))/2
  # F1Score <- 2*confusion.matrix[1, 1]/((2*confusion.matrix[1, 1]) + (confusion.matrix[2, 1]) + (confusion.matrix[1, 2]))
  Sensitivity <- confusion.matrix[1, 1]/sum(confusion.matrix[, 1])
  Specificity <- confusion.matrix[2, 2]/sum(confusion.matrix[, 2])
  df.perf <- c(Accuracy=Accuracy, Sensitivity=Sensitivity, Specificity=Specificity)
  # Precision <- confusion.matrix[1, 1]/(confusion.matrix[1, 1] + confusion.matrix[2, 1])
  # NegativePedictiveValue <- confusion.matrix[1, 2]/(confusion.matrix[1, 2] + confusion.matrix[2, 2])
  # FallOut <- confusion.matrix[2, 1]/(confusion.matrix[1, 2] + confusion.matrix[2, 2])
  # print(k)
  return(df.perf)

}
```
```{r}
vec.k <- 1: 20
results <- sapply(vec.k, find.optimum.k)
results <- apply(results,1,unlist)
results <- as.data.frame(results)
results$k <- vec.k

pl <- ggplot(data=results)+geom_line(aes(x=k,y=Accuracy),size=1,color="red")+
  geom_line(aes(x=k,y=Sensitivity),size=1,color="blue")+
  geom_line(aes(x=k,y=Specificity),size=1,color="green")+
  ylab('performance')+
  theme_bw()

```
```{r}
library(plotly)
ggplotly(pl)


```
```{r echo=FALSE}
  predictied <- knn(newData.train[, 1:30], newData.validate[, 1:30], newData.train.class, k = 1)
  confusion.table <- table(predictied, newData.validate.class)

  confusion.matrix <- data.frame(pred_Y = c(confusion.table[2, 2], confusion.table[2, 1]),
                                 pred_N = c(confusion.table[1, 2], confusion.table[1, 1]),
                                 row.names = c("Fraud", "No Fraud"))

```
```{r echo=FALSE}
  confusion.table 

 

```
```{r}
logistic.model = glm(Class ~., family = binomial, data = newData.train)
summary(logistic.model)
```
```{r}
pred <- predict(logistic.model, newdata = newData.validate, type = "response")
perf.table <- table(newData.validate$Class, pred>.1) 
conf.mat <- data.frame(pred_T=c(perf.table[2,2],perf.table[1,2]), pred_F=c(perf.table[2,1],perf.table[1,1]), row.names = c("act_T","act_F"))
ACC <-  (conf.mat[1,1]+conf.mat[2,2])/sum(conf.mat)
Sensitivity <-  conf.mat[1,1] /sum(conf.mat[,1])
Specificity <-  conf.mat[2,2] / (conf.mat[2,1]+conf.mat[2,2])

df.perf <- data.frame(ACC,Sensitivity,Specificity)
df.perf 
```
```{r}
library(MASS)
step <- stepAIC(logistic.model, direction="both", trace=T)
```
```{r}
summary(step)
```
```{r}
logistic_AIC = glm(formula = Class ~ V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + 
    V10 + V11 + V12 + V13 + V14 + V15 + V16 + V17 + V18 + V19 + 
    V20 + V21 + V22 + V23 + V24 + V25 + V26 + V27 + V28 + Amount,
    family = binomial, data = newData.validate)

summary(logistic_AIC)
```
```{r}
pred <- predict(logistic_AIC, newdata = newData.validate, type = "response")
perf.table <- table(newData.validate$Class,pred>.1) 
conf.mat <- data.frame(pred_T=c(perf.table[2,2],perf.table[1,2]), pred_F=c(perf.table[2,1],perf.table[1,1]), row.names = c("act_T","act_F"))
ACC <-  (conf.mat[1,1]+conf.mat[2,2])/sum(conf.mat)
Sensitivity <-  conf.mat[1,1] /sum(conf.mat[,1])
Specificity <-  conf.mat[2,2] / (conf.mat[2,1]+conf.mat[2,2])
df.perf <- data.frame(ACC,Sensitivity,Specificity)
df.perf 
```
```{r}
find.opt.cutoff <- function(cutoff) {
  
  perf.table <- table(newData.validate$Class, pred > cutoff)
  
  if (ncol(perf.table) == 1) {
    if (colnames(perf.table) == "TRUE") {
      perf.table <- cbind(c(0, 0), perf.table)
    }else{
      perf.table <- cbind(perf.table, c(0, 0))
    }
  }
  
  conf.mat <-
    data.frame(
      pred_T = c(perf.table[2, 2], perf.table[1, 2]),
      pred_F = c(perf.table[2, 1], perf.table[1, 1]),
      row.names = c("act_T", "act_F")
    )
  ACC <-  (conf.mat[1, 1] + conf.mat[2, 2]) / sum(conf.mat)
  Sensitivity <-  conf.mat[1, 1] / sum(conf.mat[, 1])
  Specificity <-  conf.mat[2, 2] / (conf.mat[2, 1] + conf.mat[2, 2])
  df.perf <- c(ACC=ACC, Sensitivity=Sensitivity, Specificity=Specificity)
  df.perf
}


cutoff <- seq(0,1,.1)
results <- sapply(cutoff, find.opt.cutoff) %>% t %>% data.frame()
results$cutoff <- cutoff


pl <- ggplot(data=results)+geom_line(aes(x=cutoff,y=ACC),size=1,color="red")+
  geom_line(aes(x=cutoff,y=Sensitivity),size=1,color="blue")+
  geom_line(aes(x=cutoff,y=Specificity),size=1,color="green")+
  ylab('performance')+
  theme_bw()

ggplotly(pl)
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```
```{r}
```

